
/*-----------------------------------------------*
Name: Haqdarshak Pilot Intervention-Cleaning
Date Created: 30 July, 2018
Date Last Modified: 28 April, 2021
Created by: Aaron Berman and modified by Daniela Paz and Saumya Mathur on Nov.13/2020 
Description: Clean raw data to create a intermediate scheme count intermediate file
*-------------------------------------------------*/



*..................................................
**Install user-written commands
foreach package in mdesc nmissing veracrypt {
     capture which `package'
	 if _rc==111 ssc install `package'
}
*..................................................
clear all
set more off
version 12.0
cap log close
pause off




//open log 
*cd "$logs"
*log using "cleaning_scheme_application.smcl", replace

	
	//open full dataset with listing of tracking sheet observations
	cd "$pilot"
	use "pilot_intervention_tracking_sheets_all", clear 

	//merge in completed applications data 
	merge m:1 member_id scheme_id using "pilot_intervention_completed_apps.dta"
	tab _merge
	drop if _merge == 2 //there should be 2 observations here that don't merge
	gen applied_scheme = (_merge == 2 | _merge == 3)
	*drop _merge 

	//save tempfile for later use
	tempfile schemes_all_main
	save `schemes_all_main', replace 
	pause

	preserve
	//collapse one observation per household
	collapse (sum) num_agree_hh=agreed_scheme_amount num_apply_hh=applied_scheme (count) num_offer_hh=row, by(household_id scheme_id)
	//generate relevant variables 
	gen atleast1_offered_hh = (num_offer_hh > 0 & !missing(num_offer_hh))
	gen atleast1_agreed_hh = (num_agree_hh > 0 & !missing(num_agree_hh))
	gen atleast1_applied_hh = (num_apply_hh > 0 & !missing(num_apply_hh))
	
	
	save "$intermediate_data/scheme_applications_hhlevel", replace 
	


	restore
	drop if _merge == 2 //there should be 2 observations here that don't merge
	drop _merge 

	//save tempfile for later use
	tempfile schemes_all_main
	save `schemes_all_main', replace 



	//start with baseline individual data 
	use "$pilot/hh_data_treatment_b", clear 

	//create other relevant household-level variables
	//disability
	egen num_disabled = rowtotal(disabled_*)
	gen any_disability_hh = (num_disabled > 0) if !missing(num_disabled)

	gen member_male_1 = (member_gender_1 == 1)

	keep household_id any_disability_hh above_48000 enter_annual_income caste* applied_for_govt_scheme member_male_1
	tostring household_id, replace
	tempfile baseline_characteristics
	save `baseline_characteristics', replace 

	//get village & stratum designations 
	use "$pilot/hh_data_treatment_b", clear 
	keep household_id village_id stratum
	tostring household_id, replace
	tempfile village_strata
	save `village_strata', replace 


	**********************************************************
	use "$pilot/hh_data_treatment_b", clear 

	preserve
	keep household_id annual_income housekind_kuccha housekindobserve_kuccha enter_annual_income
	tempfile hh_attributes
	save "`hh_attributes'", replace
	restore
	
	use "$pilot/individual_data_treatment_b", clear 

	** merge with household attributes
	merge m:1 household_id using "`hh_attributes'"
	assert _merge == 3
	drop _merge

	** drop if missing
	drop if missing(enter_years_) & missing(member_gender_)


	** 3g age
	rename enter_years_ age
	label var age "Age"

	** check age of first respondent
	tab age if member_num == "1", m
	assert age >= 18 & !missing(age) if member_num == "1"
	sum age if member_num == "1", d


	** 3h gender
	gen male = 1 if member_gender_ == 1
	replace male = 0 if member_gender_ == 2
	tab male, m
	label var male "Male"

	** 3i literacy
	gen literate = read_write_ == 1
	tab literate, m
	label var literate "Can read of write"

	** 3k enrolled in school
	rename enrolled_in_school_ enrolled_in_school
	tab enrolled_in_school, m
	label var enrolled_in_school "Currently enrolled in school or training"

	** 3l marital status
	rename member_marital_status_ marital_status

	** 3m occupation
	rename occupation_ occupation

	** 3n income in last month

	** 3o hours worked last week

	** 3p have documents

	** 3q bank account

	** 3r disabled
	rename disabled_ disabled
	tab disabled, m
	label var disabled "Disabled"

	** 3s disability percentage

	** 3t adopted orphan
	rename adopted_orphan_ adopted_orphan

	** 3u mother separated/divorced/widowed/remarried widow
	rename mother_widowed_ mother_widowed

	** 3v mother or father disabled
	rename mother_father_disabled_ mother_father_disabled


	****************
	** code up eligibilities for top 8 schemes in Rajasthan
	****************

	** make sure not missing key variables
	assert !missing(male)
	*assert !missing(age)

	** 1
	gen elig_sukanya = 0
	replace elig_sukanya = 1 if age <= 10 & !missing(age) & male == 0
	tab elig_sukanya, m

	** 2
	gen elig_postoffice = 0
	replace elig_postoffice = 1 
	tab elig_postoffice, m

	** 3 
	gen elig_laborcard = 0
	replace elig_laborcard = 1 if age >= 18 & age <= 60 & inlist(occupation, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37)
	tab elig_laborcard, m

	** 4
	gen elig_awas_yojana = 0 // not done
	replace elig_awas_yojana = 1 if age >=18 & !missing(age) & housekind_kuccha == 1 
	tab elig_awas_yojana, m

	** 5
	gen elig_oldage_pension = 0 
	replace elig_oldage_pension = 1 if age >= 58 & !missing(age) & male == 1 & enter_annual_income < 48000
	replace elig_oldage_pension = 1 if age >= 55 & !missing(age) & male == 0 & enter_annual_income < 48000
	tab elig_oldage_pension, m

	** 6
	gen elig_palanhaar = . // this was too sensitive, so we did not ask these questions
	tab elig_palanhaar, m

	** 7
	gen elig_widow_pension = 0
	replace elig_widow_pension = 1 if age >= 18 & !missing(age) & male == 0 & enter_annual_income < 48000 & inlist(marital_status,3,4,5,6)
	tab elig_widow_pension, m

	** 8
	gen elig_atalpension = 0
	replace elig_atalpension = 1 if age >=18 & age <=40 & !missing(age)
	tab elig_atalpension, m


	****************
	** code up whether individual is eligible for scheme and receiving scheme
	****************

	rename has_*_ has_*

	local j "sukanya postoffice laborcard oldage_pension widow_pension awas_yojana atalpension"
	foreach i of numlist 1/7  {
		local x: word `i' of `j'

		gen has_elig_`x' = .
		replace has_elig_`x' = 0 if elig_`x' == 1
		replace has_elig_`x' = 1 if elig_`x' == 1 & has_`x' == 1

		gen doesnt_have_elig_`x' = .
		replace doesnt_have_elig_`x' = 0 if elig_`x' == 1
		replace doesnt_have_elig_`x' = 1 if elig_`x' == 1 & has_`x' == 0

		gen has_not_elig_`x' = .
		replace has_not_elig_`x' = 0 if elig_`x' == 0
		replace has_not_elig_`x' = 1 if elig_`x' == 0 & has_`x' == 1

		gen not_elig_`x' = .
		replace not_elig_`x' = 0 if elig_`x' == 1
		replace not_elig_`x' = 1 if elig_`x' == 0
		
		assert !missing(has_elig_`x') if missing(has_not_elig_`x')
		assert missing(has_elig_`x') if !missing(has_not_elig_`x')
		assert !missing(has_not_elig_`x') if missing(has_elig_`x')
		assert missing(has_not_elig_`x') if !missing(has_elig_`x')
	}


save "$intermediate_data/individual_scheme_counts", replace
